Some intro text?
load packages
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
-- Attaching packages ----------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
v ggplot2 3.3.3 v purrr 0.3.4
v tibble 3.0.6 v dplyr 1.0.4
v tidyr 1.1.2 v stringr 1.4.0
v readr 1.4.0 v forcats 0.5.1
-- Conflicts -------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
library(janitor)
Attache Paket: 㤼㸱janitor㤼㸲
The following objects are masked from 㤼㸱package:stats㤼㸲:
chisq.test, fisher.test
library(ggthemes)
library(plotly)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attache Paket: 㤼㸱plotly㤼㸲
The following object is masked from 㤼㸱package:ggplot2㤼㸲:
last_plot
The following object is masked from 㤼㸱package:stats㤼㸲:
filter
The following object is masked from 㤼㸱package:graphics㤼㸲:
layout
library(hexbin)
library(viridis)
Lade n昼㸶tiges Paket: viridisLite
library(corrplot)
corrplot 0.84 loaded
assign data from csv to a variable (tbl) named ‘data’
data <- read_csv('../data/MeteorStrikesDataSet1.csv') %>%
clean_names()
-- Column specification ----------------------------------------------------------------------------------------------------------------------------------
cols(
place = col_character(),
year = col_double(),
mass_g = col_double(),
longitude = col_double(),
latitude = col_double(),
fell_found = col_character()
)
first look at data with head, tail and summary
head(data, 50)
tail(data)
summary(data)
place year mass_g longitude latitude fell_found
Length:34065 Min. :-600 Min. : 0 Min. :-165.43 Min. :-87.37 Length:34065
Class :character 1st Qu.:1981 1st Qu.: 6 1st Qu.: 26.00 1st Qu.:-76.90 Class :character
Mode :character Median :1990 Median : 25 Median : 55.25 Median :-72.00 Mode :character
Mean :1987 Mean : 17084 Mean : 70.54 Mean :-48.05
3rd Qu.:2000 3rd Qu.: 176 3rd Qu.: 159.26 3rd Qu.: 15.37
Max. :2012 Max. :60000000 Max. : 178.20 Max. : 81.17
NA's :142
data %>%
filter(mass_g < 1000) %>%
summary()
place year mass_g longitude latitude fell_found
Length:30258 Min. :-600 Min. : 0.010 Min. :-165.43 Min. :-87.37 Length:30258
Class :character 1st Qu.:1984 1st Qu.: 4.503 1st Qu.: 35.67 1st Qu.:-79.68 Class :character
Mode :character Median :1990 Median : 18.080 Median : 56.70 Median :-72.77 Mode :character
Mean :1990 Mean : 91.143 Mean : 78.29 Mean :-55.43
3rd Qu.:2000 3rd Qu.: 84.823 3rd Qu.: 159.67 3rd Qu.:-71.50
Max. :2012 Max. :999.900 Max. : 175.00 Max. : 70.73
NA's :111
missingYear <- data %>%
filter(is.na(year))
missingYear
look at distribution of numerical data
histogramYears <- ggplot(data, aes(x = year)) +
geom_histogram(bins = 90) +
ggtitle('distribution of meteors per year') +
theme_fivethirtyeight()
ggplotly(histogramYears)
Removed 142 rows containing non-finite values (stat_bin).
qqnorm(data$year)
data1800y <- data %>%
filter(year > 1800)
qqnorm(data1800y$year)
data1960y <- data %>%
filter(year > 1960)
qqnorm(data1960y$year)
barplot1960 <- ggplot(data1960y, aes(x = year)) +
geom_bar() +
theme_fivethirtyeight()
ggplotly(barplot1960)
#histogram1980 <- ggplot(data1980y, aes(x = year)) +
# geom_histogram(bins = 90) +
# ggtitle('distribution of meteors per year') +
# theme_fivethirtyeight()
#ggplotly(histogram1980)
histogramMass <- ggplot(data, aes(x = mass_g)) +
geom_histogram(bins = 90) +
ggtitle('distribution of mass_g of meteorites') +
theme_fivethirtyeight()
ggplotly(histogramMass)
qqnorm(data$mass_g)
data1000g <- data %>%
filter(mass_g < 1e+04)
qqnorm(data1000g$mass_g)
histogram1000g <- ggplot(data1000g, aes(x = mass_g)) +
geom_histogram(bins = 90) +
ggtitle('distribution of mass_g of meteorites') +
theme_fivethirtyeight()
ggplotly(histogram1000g)
data176g <- data %>%
filter(mass_g < 176)
qqnorm(data176g$mass_g)
histogram176g <- ggplot(data176g, aes(x = mass_g)) +
geom_histogram(bins = 90) +
ggtitle('distribution of mass_g of meteorites') +
theme_fivethirtyeight()
ggplotly(histogram176g)
data25g <- data %>%
filter(mass_g < 25)
qqnorm(data25g$mass_g)
histogram25g <- ggplot(data25g, aes(x = mass_g)) +
geom_histogram(bins = 90) +
ggtitle('distribution of mass_g of meteorites') +
theme_fivethirtyeight()
ggplotly(histogram25g)
histogramLogMass <- ggplot(data, aes(x = mass_g)) +
geom_histogram(bins = 70) +
ggtitle('distribution of mass_g of meteorites') +
theme_fivethirtyeight() +
scale_x_log10()
ggplotly(histogramLogMass)
ggplot(data, aes(x = fell_found)) +
geom_bar() +
ggtitle('comparison of found and fell meteorites') +
xlab('') +
theme_fivethirtyeight()
longOlat <- ggplot(data) +
geom_point(mapping = aes(y = latitude, x = longitude), alpha = 1 / 1)
longOlat
#ggplotly(longOlat)
longOlatAlp <- ggplot(data) +
geom_point(mapping = aes(y = latitude, x = longitude), alpha = 1 / 20)
longOlatAlp
#ggplotly(longOlatAlp)
longOlatHex <- ggplot(data) +
geom_hex(mapping = aes(y = latitude, x = longitude)) +
scale_fill_viridis()
ggplotly(longOlatHex)
massYear <- ggplot(data) +
geom_point(mapping = aes(x = year, y = mass_g), alpha = 1 / 1)
massYear
#ggplotly(massYear)
data1980y1e4m <- data %>%
filter(year > 1800) %>%
filter(mass_g < 1e6)
massYearSmall <- ggplot(data1980y1e4m) +
geom_point(mapping = aes(x = year, y = mass_g), alpha = 1 / 10)
massYearSmall
#ggplotly(massYear)
data1980y1e4m <- data %>%
filter(year > 1940) %>%
filter(mass_g < 1e4)
massYearSmall <- ggplot(data1980y1e4m) +
geom_point(mapping = aes(x = year, y = mass_g), alpha = 1 / 10)
massYearSmall
#ggplotly(massYear)
data %>%
select_if(is.numeric) %>%
cor() %>%
corrplot(method = "square")
data %>%
filter(fell_found == 'Fell')
data %>%
filter(year >= 2000)
data %>%
filter(longitude > 165) %>%
filter(latitude < -80) %>%
ggplot() +
geom_hex(mapping = aes(y = latitude, x = longitude)) +
scale_fill_viridis()
data %>%
filter(longitude < 50) %>%
filter(longitude > 0) %>%
filter(latitude < -50) %>%
ggplot() +
geom_hex(mapping = aes(y = latitude, x = longitude)) +
scale_fill_viridis()
data %>%
filter(longitude < 35.75) %>%
filter(longitude > 35.6) %>%
filter(latitude < -71) %>%
filter(latitude > -72) %>%
ggplot() +
geom_hex(mapping = aes(y = latitude, x = longitude)) +
scale_fill_viridis()